# load libraries
library(tidyverse)
library(cobalt)
library(hrbrthemes)
library(paletteer)
library(WeightIt)
library(stargazer)
library(here)
library(survey)
library(broom)

source("replication/00-funcs.R")

# read data
clean_df = read_rds(here("replication", "data", "ipw-data.rds"))




# ipw -------------------------------------------------------------------------------


# define outcomes
outcomes = read_csv("replication/data/outcome_dictionary.csv")

# define covariates
dict = read_csv("replication/data/covar_justification.csv")



# create indices


## helper function
# takes string of dvs separated by comma and spits out vector of dvs
split_dvs = function(x)
{
  temp = x %>% 
    str_split(",") %>% 
    unlist()
  
  return(temp)
}

standardize = function(x)
{
  temp = mutate(x, across(-r_id, ~as.numeric(scale(as.numeric(.))))) %>% 
    # take the average across columns
    rowwise() %>% 
    mutate(index = mean(c_across(-r_id), na.rm = TRUE)) %>% 
    # ungroup to avoid errors with rowwise()
    ungroup() %>% 
    select(r_id, index)
  
  return(temp)
}


indices = outcomes %>% 
  filter(!str_detect(dirty, "index")) %>% 
  # collapse outcome variables into a vector
  group_by(group) %>% 
  summarise(dvs = str_c(dirty, collapse = ",")) %>% 
  mutate(dvs = map(dvs, split_dvs)) %>% 
  # add data in
  mutate(data = list(clean_df)) %>% 
  # subset data down to relevant outcomes
  mutate(data = map2(.x = dvs, .y = data, .f = ~ select(.y, r_id, all_of(.x)))) %>% 
  # create averaged z-score
  mutate(index = map(data, standardize)) %>% 
  unnest(index) %>% 
  select(group, r_id, index) %>% 
  pivot_wider(names_from = "group", values_from = "index", names_glue = "{group}_index")



clean_df = clean_df %>% 
  left_join(indices, by = "r_id")

# subset down to key vars
match_df = clean_df %>% 
  select(r_id, displace, man, age_num, hhsize, indrural, lpop, lrdp_num,
         read, race_bin, land_owned,
         discapital, nbi, laltura, homicidios, AUC_, FARC_, ELN_, 
         desplazados_expulsion, other_victim) %>% 
  drop_na()




# weightit ----------------------------------------------------------------




## weightit
out = weightit(displace ~ man + age_num + hhsize + read + race_bin + 
                 indrural + lpop + discapital + lrdp_num +
                 nbi + laltura + homicidios + AUC_ + FARC_ +
                 desplazados_expulsion + other_victim + land_owned,
               data = match_df, method = "cbps")

## note: stabilize = TRUE returns almost identical results




# output distribution of weights
ess = summary(out)$effective.sample.size %>% as_tibble(rownames = "Type")
var = summary(out)$coef.of.var %>% tibble()


out_tab = tibble(Status = c("Treated", "Control"), 
       `Coef. of variation` = as.numeric(summary(out)$coef.of.var), 
       `MAD` = as.numeric(summary(out)$scaled.mad),
       `Entropy` = as.numeric(summary(out)$negative.entropy), 
       `ESS (unweighted)` = rev(as.numeric(summary(out)$effective.sample.size[1,])), 
       `ESS (weighted)` = rev(as.numeric(summary(out)$effective.sample.size[2,])))


out_tab %>% 
  mutate(across(where(is.double), ~round(., 2))) %>% 
  stargazer::stargazer(summary = FALSE, rownames = FALSE, 
                       out = here("replication", "figures", "tab-sum.tex"),
                       title = "Summary statistics for weights.", 
                       label = "tab:sum", style = "APSR")



# check for balance
pdat = 
  love.plot(out)$data %>% tibble() %>% 
  left_join(dict, by = c("var" = "dirty")) %>% 
  mutate(clean = factor(clean), 
         clean = fct_relevel(clean, "Propensity score (distance)", after = Inf))


ggplot(pdat, aes(x = clean, y = stat, color = Sample, group = clean)) + 
  coord_flip() + 
  geom_line(linewidth = 1, color = "black", alpha = .9) + 
  geom_point(size = 3, alpha = .9) + 
  scale_color_manual(values = c(palette[1], palette[7])) + 
  theme(legend.position = "bottom", 
        panel.grid.minor = element_blank()) + 
  labs(x = NULL, y = "Standardized differences between treatment and control", title = "Pre- and Post-Weighting Balance",
       subtitle = "Weights generated via Covariate Balancing Propensity Score (CBPS)",
       color = "Statistic:")

ggsave("replication/figures/balance.pdf", device = cairo_pdf)



# add weights back to data for analysis
match_df$weights = out$weights
match_df = 
  left_join(match_df, dplyr::select(clean_df, r_id, all_of(outcomes$dirty))) %>% 
  # add muni back in for clustering
  left_join(select(clean_df, r_id, municipality), by = "r_id")




# models
forms = crossing(outcomes = outcomes$dirty) %>% 
  mutate(controls = paste(c("desplazados_expulsion", "FARC_", "land_owned"), 
                          collapse = " + ")) %>% 
  mutate(f = paste0(outcomes, " ~ ", paste0("displace", " + ", controls)))


# define survey design

## no clusters, ipw weights
dw = svydesign(~ 1, weights = match_df$weights, data = match_df)

## clusters, ipw weights
dw_cl = svydesign(~ municipality, weights = match_df$weights, data = match_df)



# estimate models
results = forms %>% 
  mutate(model = map(.x = f, .f = ~svyglm(formula(.x), design = dw))) %>% 
  mutate(tidy = map(model, ~tidy(.x, conf.int = TRUE)))


clustered_results = forms %>% 
  mutate(model = map(.x = f, .f = ~svyglm(formula(.x), design = dw_cl))) %>% 
  mutate(tidy = map(model, ~tidy(.x, conf.int = TRUE)))



# plot results
results %>% 
  unnest(tidy) %>% 
  filter(term == "displace") %>% 
  left_join(outcomes, by = c("outcomes" = "dirty")) %>% 
  mutate(clean = str_to_sentence(clean), 
         group = str_to_sentence(group)) %>% 
  ggplot(aes(x = clean, y = estimate, ymin = conf.low, 
             ymax = conf.high, color = p.value < .05)) + 
  geom_pointrange(size = .7) + 
  coord_flip() +
  facet_wrap(vars(group), scales = "free_y", ncol = 1) + 
  geom_hline(yintercept = 0, lty = 2) + 
  theme(legend.position = "none", 
        panel.grid.minor = element_blank(), 
        axis.text.y = element_text(size = 10), 
        strip.text = element_text(face = "bold", color = "white",
                                  size = 12, 
                                  hjust = 0), 
        strip.background = element_rect(fill = palette[8])) + 
  labs(y = NULL, 
       x = NULL, color = latex2exp::TeX('p $<$ .05:'),
       title = "Primary Results on Effect of Displacement",
       subtitle = "Coefficient estimates based on estimated IPW weights") + 
  scale_color_manual(values = c("gray", palette[8]))

ggsave(here("replication", "figures", "estimates.pdf"), 
       device = cairo_pdf, width = 8, height = 12)


# plot results
bind_rows(mutate(results, ses = "No clustering"), 
          mutate(clustered_results, ses = "Clustered")) %>% 
  unnest(tidy) %>% 
  filter(term == "displace") %>% 
  left_join(outcomes, by = c("outcomes" = "dirty")) %>%
  mutate(clean = str_to_sentence(clean), 
         group = str_to_sentence(group)) %>% 
  #mutate(clean = str_replace(clean, "\\(", "\n(")) %>% 
  ggplot(aes(x = clean, y = estimate, ymin = conf.low, 
             ymax = conf.high, color = ses)) + 
  geom_pointrange(size = .7, position = position_dodge(width = 0.5)) + 
  coord_flip() +
  facet_wrap(vars(group), ncol = 1, scales = "free_y") + 
  geom_hline(yintercept = 0, lty = 2) + 
  theme(legend.position = "top", 
        panel.grid.minor = element_blank(), 
        axis.text.y = element_text(size = 10), 
        strip.text = element_text(face = "bold", color = "white",
                                  size = 12, 
                                  hjust = 0), 
        strip.background = element_rect(fill = palette[8])) + 
  labs(y = NULL, 
       x = NULL,
       title = "Primary Results on Effect of Displacement",
       color = "Standard errors:",
       subtitle = "Coefficient estimates based on estimated IPW weights") + 
  scale_color_manual(values = c(palette[1], palette[8]))

ggsave(here("replication", "figures", "estimates-compare-ses.pdf"), 
       device = cairo_pdf, width = 8, height = 12)




# adaptive BH ----------------------------------------------------------------

# function
get_bh = function(x)
{
  return(mutoss::adaptiveBH(x, alpha = .05)$adjPValues)
}


# all outcomes
bh_test = results |> 
  unnest(tidy) |> 
  filter(term == "displace") |> 
  mutate(bh = get_bh(p.value)) |> 
  select(outcomes, p.value, bh) |> 
  select(Outcomes = outcomes, `original p-value` = p.value, 
         `Adaptive BH p-values` = bh) |> 
  mutate(across(where(is.double), round, 3))

stargazer(bh_test, title = "Multiple comparison testing for all outcomes.", 
          label = "mult", summary = FALSE,
          type = "latex",
          out = here::here("replication", "figures", "multi-all.tex"))


# indices
bh_test = results |> 
  unnest(tidy) |> 
  filter(term == "displace") |> 
  filter(str_detect(outcomes, "index")) |>
  mutate(bh = get_bh(p.value)) |> 
  select(outcomes, p.value, bh) |> 
  select(Outcomes = outcomes, `original p-value` = p.value, 
         `Adaptive BH p-values` = bh) |> 
  mutate(across(is.double, round, 3))
  
stargazer(bh_test, title = "Multiple comparison testing.", 
            label = "mult", summary = FALSE,
            column.labels = c("Cohesion", "Land", "Psychological", 
                              "Trust", "Welfare"),
              type = "latex",
          out = here::here("replication", "figures", "multi-index.tex"))

# truncate extreme weights ------------------------------------------------



# truncate extreme weights
ggplot(match_df, aes(x = weights)) + 
  geom_histogram(bins = 50, color = "white", fill = "#003049", alpha = .8) + 
  labs(x = "Estimated probability of treatment (weights)",
       y = NULL) + 
  theme(axis.text.y = element_blank(), 
        panel.grid.minor = element_blank())
ggsave(here("replication", "figures", "weight-dist.pdf"), device = cairo_pdf)


# define survey design
dw = svydesign(~ 1, weights = match_df$weights[match_df$weights < quantile(match_df$weights, .95)], 
               data = filter(match_df, weights < quantile(match_df$weights, .95))) # filter out 95% percentile weights


# estimate models
results = forms %>% 
  mutate(model = map(.x = f, .f = ~svyglm(formula(.x), design = dw))) %>% 
  mutate(tidy = map(model, ~tidy(.x, conf.int = TRUE)))



# plot results
results %>% 
  unnest(tidy) %>% 
  filter(term == "displace") %>% 
  left_join(outcomes, by = c("outcomes" = "dirty")) %>% 
  mutate(clean = str_to_sentence(clean), 
         group = str_to_sentence(group)) %>% 
  ggplot(aes(x = clean, y = estimate, ymin = conf.low, 
             ymax = conf.high, color = p.value < .05)) + 
  geom_pointrange(size = .7) + 
  coord_flip() +
  facet_wrap(vars(group), scales = "free_y", ncol = 1) + 
  geom_hline(yintercept = 0, lty = 2) + 
  theme(legend.position = "none", 
        panel.grid.minor = element_blank(), 
        axis.text.y = element_text(size = 10), 
        strip.text = element_text(face = "bold", color = "white",
                                  size = 12, 
                                  hjust = 0), 
        strip.background = element_rect(fill = palette[8])) + 
  labs(y = NULL, 
       x = NULL, color = latex2exp::TeX('p $<$ .05:'),
       title = "Primary Results on Effect of Displacement",
       subtitle = "Coefficient estimates based on estimated IPW weights") + 
  scale_color_manual(values = c("gray", palette[8]))


ggsave(here("replication", "figures", 
            "estimates-truncated.pdf"), device = cairo_pdf,
       height = 12, width = 8)



# robustness to other methods -------------------------------------------------------

# ipw robustness to other algorithms
robust_weights = crossing(method = c("gbm", "cbps", "ebal", "energy")) %>% 
  mutate(weight_mods = map(.x = method, ~weightit(displace ~ man + age_num + hhsize + read + 
                                                    indrural + lpop + discapital + lrdp_num +
                                                    nbi + laltura + homicidios + AUC_ + FARC_ + 
                                                    desplazados_expulsion + other_victim + land_owned,
                                                  data = match_df,
                                                  method = .x))) %>% 
  mutate(balance = map(weight_mods, bal.tab)) %>% 
  mutate(weights = map(weight_mods, ~pluck(.x, "weights")))


# check that weights are being extracted correctly...
for(ii in 1:4)
{
  p = all(robust_weights$weight_mods[[ii]]$weights == robust_weights$weights[[ii]])
  print(p)
}



# robust models
robust_models = robust_weights %>% 
  mutate(model = map(weights, ~lm(together ~ displace, data = match_df, weights = .x))) %>% 
  mutate(tidy = map(model, tidy))


# , and that weighted estimates are correct:
for(i in 1:4)
{
  p = all(coef(robust_models$model[[i]]) == coef(lm(together ~ displace, data = match_df, weights = robust_models$weights[[i]])))
  print(p)
}


# now do it for all the outcomes
robust_results = crossing(robust_weights, 
                          outcomes = outcomes$dirty) %>% 
  mutate(controls = paste(c("desplazados_expulsion", "FARC_", "land_owned"), collapse = " + ")) %>% 
  mutate(f = paste0(outcomes, " ~ ", paste0("displace", " + ", controls))) %>% 
  mutate(dw = map(weights, ~svydesign(~ 1, weights = .x, data = match_df))) %>% 
  mutate(model = map2(.x = f,.y = dw, .f = ~svyglm(formula(.x), design = .y))) %>% 
  mutate(tidy = map(model, ~tidy(.x, conf.int = TRUE)))



robust_results %>% 
  unnest(tidy) %>% 
  filter(term == "displace") %>% 
  mutate(method = case_when(method == "cbps" ~ "Covariate Balancing Propensity Score", 
                            method == "ebal" ~ "Entropy balancing", 
                            method == "gbm" ~ "Propensity score (gbm)", 
                            method == "energy" ~ "Energy balancing")) %>% 
  left_join(outcomes, by = c("outcomes" = "dirty")) %>% 
  ggplot(aes(x = method, y = estimate, ymin = conf.low, 
             ymax = conf.high, color = method)) + 
  geom_pointrange() + 
  facet_wrap(vars(outcomes), scales = "free_y") + 
  theme(axis.text.x = element_blank(), 
        legend.position = "top", 
        panel.grid.minor = element_blank(), 
        axis.ticks.x = element_blank()) + 
  labs(x = NULL,
       y = "Estimated effect of displacement", 
       color = "Weighting approach:") + 
  scale_color_viridis_d(option = "magma", end = .7) + 
  guides(color = guide_legend(nrow = 2)) + 
  geom_hline(yintercept = 0, lty = 2)

ggsave(here("replication", "figures", "robust-algo.pdf"), 
       device = cairo_pdf)

